library(data.table)
library(scales)
library(ggplot2)
library(stargazer) 
library(readxl)
library(stringr)
library(ggthemes)
library(rstudioapi)
library(bookdown)
library(gridExtra)
library(lubridate)
library(kableExtra)
library(pracma)
library(tseries)
library(stringr)
library(RQuantLib)
library(latex2exp)
library(tikzDevice)
library(zoo)

rm(list=ls())
options(max.print=999999)



# Sets the current path
current_path <- "XXX"

# Load summary stats
summ_stats_all <- fread(paste0(current_path,"/data/wrds_summary_statistics/summary_statistics_FISDrating_dropunusuralbonds_meangrade_0409.csv"))

setnames(summ_stats_all,"trd_exctn_dt","DATE")
summ_stats_all[,DATE:=ymd(DATE)]

all_DATES <- seq(min(summ_stats_all$DATE),max(summ_stats_all$DATE),by="1 day")
indic_B <- isBusinessDay(calendar="UnitedStates",all_DATES)
all_B_DATES<-all_DATES[indic_B]

# Remove weekends and holidays
summ_stats_all <- summ_stats_all[DATE %in% all_B_DATES]

summ_stats_EOD <- fread(paste0(current_path,"/data/wrds_summary_statistics/logit_estimation_data/SumStats_combine_TRACE_standard_EOD_Jan_June.csv"))
AllCombine_marketsentiment_wide<-fread(paste0(current_path,"/market_sentiment/data_inventory_plots/CumulativeInventory_sinceFeb19_Norm_MarketSentiment_Jan_June_0609.csv"))

summ_stats_EOD[,DATE:=ymd(DATE)]
all_DATES <- seq(min(summ_stats_EOD$DATE),max(summ_stats_EOD$DATE),by="1 day")
indic_B <- isBusinessDay(calendar="UnitedStates",all_DATES)
all_B_DATES<-all_DATES[indic_B]

# Remove weekends and holidays
summ_stats_EOD <- summ_stats_EOD[DATE %in% all_B_DATES]

# The start and end date for the series
start_date_EOD <- ymd("2020-01-19")
end_date_EOD   <- ymd("2020-07-01")

# Remove weekends and holidays
summ_stats_EOD <- summ_stats_EOD[DATE >= start_date_EOD & DATE<=end_date_EOD]



# The start and end date for the series
start_date <- ymd("2020-02-19")
end_date   <- ymd("2020-03-30")

# The key dates we want to highlight all along
X_axis_breaks<-as.Date(c("2020-02-19","2020-03-05","2020-03-09","2020-03-18","2020-03-23","2020-03-30"))
X_axis_labels<-format(X_axis_breaks,format = "%b-%d")


X_axis_breaks_EOD <- as.Date(c("2020-01-19","2020-02-19","2020-03-05","2020-03-18","2020-03-23",
                               "2020-04-09","2020-05-12","2020-06-16","2020-06-29"))

X_axis_labels_EOD <- format(X_axis_breaks_EOD,format = "%b-%d")


# Figure width and height
fheight<-4
fwidth <-6.4
# Figure text size
ftxtsize<-8 

# The cutoff date for calculating adjustments
cutoff <- ymd("2020-03-09")

#Pick up dates in the time window
summ_stats <- summ_stats_all[DATE<=end_date & DATE>= start_date]


# Parameters for figure
mahyarblue <- rgb(0,0.447,0.741)
mahyarred <- rgb(0.85,0.325,0.098)
mahyargreen <- rgb(0.1,0.6,0.4)
mahyarorange <- rgb(1,0.7,0.2)
annotate_vline2 <- 0.7 
annotate_vline1 <- 0.65

#### Figure 1 ####
plot <- ggplot() + 
  geom_line(data = summ_stats_EOD,aes(x=as.Date(DATE),y=wavemean_spread_CH_bondday_bps, color = "CH",linetype = "CH"),size = 1.4) +
  geom_line(data = summ_stats_EOD,aes(x=as.Date(DATE),y=wavemean_wave_daily_irc_byvol_all_bps,color = "MIRC",linetype = "MIRC"),size = 1.4) +
  geom_line(data = summ_stats_EOD,aes(x=as.Date(DATE),y=wavemean_spread_CH_bondday_bps-wavemean_wave_daily_irc_byvol_all_bps,
                                      color = "diff",linetype = "diff"),size = 0.8) +
  xlab("") + ylab("Transaction cost (bps)") + theme_bw() +
  geom_text(aes(x=as.Date("2020-02-19"), label="stock market peak", y = 180, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-05"), label="stock market trough", y = 180, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-18"), label="PDCF", y = 70, vjust=-0.3), colour="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-23"), label="P/SMCCF announced", y = 70, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-04-09"), label="P/SMCCF expanded", y = 180, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-05-12"), label="SMCCF began buying ETFs", y = 180, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-16"), label="SMCCF began buying bonds", y = 180, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-29"), label="PMCCF began operating", y = 180, vjust=-0.3), color="gray40", angle=90) +
  # scale_x_date(breaks = seq(start_date, end_date, by="4 days"), labels=date_format("%b-%d"))+
  scale_y_continuous(breaks = seq(0,275,by = 50))+
  scale_colour_manual("",breaks = c("CH","MIRC","diff"),
                      values = c("CH"= mahyarred,"MIRC"=mahyarblue, "diff"=mahyargreen),
                      labels =  c("Risky-Principal (high-quality)","Agency (low-quality)","difference")) +
  scale_linetype_manual("",breaks = c("CH","MIRC","diff"),
                        values = c("CH"=1,"MIRC"= 1,"diff"=12),
                        labels =  c("Risky-Principal (high-quality)","Agency (low-quality)","difference")) +
  theme(axis.text.x = element_text(size=ftxtsize,face="plain"), axis.text.y = element_text(size=ftxtsize,face="plain"),
        axis.title.x = element_text(size=ftxtsize,face="plain"), axis.title.y = element_text(size=ftxtsize,face="plain"),
        legend.text = element_text(size=ftxtsize,face="plain")) + 
  theme(legend.key.size =  unit(0.64, "in"),legend.key.height =  unit(0.2, "in"),
        legend.key.width =  unit(0.4, "in"),legend.position="bottom",
        legend.spacing.x = unit(.6, 'cm'),legend.margin=margin(t=-10)) +
  scale_x_date(breaks = as.Date(X_axis_breaks_EOD) , labels = X_axis_labels_EOD)+
  theme(axis.text.x = element_text(angle = 45, vjust = 0.6)) + 
  geom_vline(xintercept=X_axis_breaks_EOD, color="gray45", size=0.6,linetype=2) 
print(plot)



#### Figure 2 ####
scaleFactor <- max(summ_stats_EOD$prop_agency_num) / max(summ_stats_EOD$prop_agency_vol)

summ_stats_EOD[,prop_agency_num_MA:=movavg(prop_agency_num,10,"r")]
summ_stats_EOD[,prop_agency_vol_MA:=movavg(prop_agency_vol,10,"r")]

plot <- ggplot() + 
  geom_line(data = summ_stats_EOD,aes(x=as.Date(DATE),y=prop_agency_num,   color = "number",linetype = "number"),size = 0.7) +
  geom_line(data = summ_stats_EOD,aes(x=as.Date(DATE),y=prop_agency_num_MA,   color = "numberMA",linetype = "numberMA"),size = 1.4) +
  geom_line(data = summ_stats_EOD,aes(x=as.Date(DATE),y=prop_agency_vol*scaleFactor, color = "volume" ,linetype = "volume") ,size = 0.7) +
  geom_line(data = summ_stats_EOD,aes(x=as.Date(DATE),y=prop_agency_vol_MA*scaleFactor, color = "volumeMA" ,linetype = "volumeMA") ,size = 1.4) +
  scale_y_continuous(name ="Proportion of agency trades by number of trades", 
                     labels = scales:: label_number_auto(),
                     sec.axis=sec_axis(~./scaleFactor, name="Proportion of agency trades by volume of trades",
                                       labels = scales:: label_number_auto())) +
  xlab("") +  theme_bw() + 
  scale_colour_manual("",breaks = c("number","numberMA","volume","volumeMA"),
                      values = c("number"= mahyarblue,"numberMA"=mahyarblue,
                                 "volume"=mahyarred,"volumeMA"=mahyarred),
                      labels = c("Number","10-day MA of Number","Volume","10-day MA of Volume ")) +
  scale_linetype_manual("",breaks = c("number","numberMA","volume","volumeMA"),
                        values = c("number"=2,"numberMA"=1,"volume"= 2,"volumeMA"=1),
                        labels = c("Number","10-day MA of Number","Volume","10-day MA of Volume ")) +
  theme(
    axis.title.y.left=element_text(color=mahyarblue),
    axis.text.y.left=element_text(color=mahyarblue),
    axis.title.y.right=element_text(color=mahyarred),
    axis.text.y.right=element_text(color=mahyarred)
  ) + 
  theme(axis.text.x = element_text(size=ftxtsize,face="plain"), 
        axis.text.y = element_text(size=ftxtsize,face="plain"),
        axis.title.x = element_text(size=ftxtsize,face="plain"), 
        axis.title.y = element_text(size=ftxtsize,face="plain"),
        legend.text = element_text(size=ftxtsize,face="plain")) + 
  theme(legend.key.size =  unit(0.64, "in"),
        legend.key.height =  unit(0.2, "in"),legend.key.width =  unit(0.4, "in"),
        legend.position="bottom",legend.spacing.x = unit(0.5, 'cm'),legend.margin=margin(t=-10)) +
  scale_x_date(breaks = as.Date(X_axis_breaks_EOD) , labels = X_axis_labels_EOD)+
  theme(axis.text.x = element_text(angle = 45, vjust = 0.6)) + 
  geom_vline(xintercept=X_axis_breaks_EOD, color="gray45", size=0.6,linetype=2) 
print(plot)


#### Figure 3 ####
DD <- fread(paste0(current_path,"/market_sentiment/merged_market_sentiment.csv"))


start_date <- ymd("20-02-19")
end_date   <- ymd("20-10-08")
DD <- DD[as.Date(DATE)>=start_date & as.Date(DATE)<=end_date]


DD[,CumInv_InvestmentGrade :=cumsum(TotalVolume_DealerBuyFromCustomer_InvestmentGrade-TotalVolume_DealerSellToCustomer_InvestmentGrade)]

DD[,CumInv_HighYield :=cumsum(TotalVolume_DealerBuyFromCustomer_HighYield-TotalVolume_DealerSellToCustomer_HighYield)]

DD[,CumInv_AllSecurities :=cumsum(TotalVolume_DealerBuyFromCustomer_AllSecurities-TotalVolume_DealerSellToCustomer_AllSecurities)]

DD$CumInv_AllSecurities <- DD$CumInv_AllSecurities - DD$CumInv_AllSecurities[1]

DD$CumInv_HighYield <- DD$CumInv_HighYield - DD$CumInv_HighYield[1]
DD$CumInv_InvestmentGrade <- DD$CumInv_InvestmentGrade - DD$CumInv_InvestmentGrade[1]

X_axis_breaks<-as.Date(c("2020-02-19","2020-03-05","2020-03-18","2020-03-23", "2020-04-09",
                         "2020-05-12","2020-06-16","2020-06-29"))

X_axis_labels<-format(X_axis_breaks,format = "%b-%d")

plot <- ggplot() +
  geom_line(data = DD[DATE<="2020-07-01"],aes(x=as.Date(DATE),y=CumInv_AllSecurities/1000) ,color=mahyarblue,size = 1.4) +
  xlab("") + ylab("billions of USD") + theme_bw() +
  scale_y_continuous(
    name ="Cumulative inventory (\\$billion)",
    # Add a second axis and specify its features
    sec.axis = sec_axis(~.*0.000135, name="As a fraction of outstanding supply (\\%)",
                        labels = scales::label_number_auto()))+
  geom_text(aes(x=as.Date("2020-02-19"), label="stock market peak", y = 50, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-05"), label="stock market trough", y = 50, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-18"), label="PDCF", y = 50, vjust=-0.3), colour="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-23"), label="P/SMCCF announced", y = 50, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-04-09"), label="P/SMCCF expanded", y = 50, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-05-12"), label="SMCCF began buying ETFs", y = 10, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-16"), label="SMCCF began buying bonds", y = 10, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-29"), label="PMCCF began operating", y = 10, vjust=-0.3), color="gray40", angle=90) +
  theme(axis.text.x = element_text(size=ftxtsize,face="plain"), axis.text.y = element_text(size=ftxtsize,face="plain"),
        axis.title.x = element_text(size=ftxtsize,face="plain"), axis.title.y = element_text(size=ftxtsize,face="plain"),
        legend.text = element_text(size=ftxtsize,face="plain")) + 
  theme(legend.key.size =  unit(0.64, "in"),legend.key.height =  unit(0.2, "in"),legend.key.width =  unit(0.4, "in"),legend.position="bottom")+
  theme(axis.text.x = element_text(angle = 45, vjust = 0.6)) + 
  geom_vline(xintercept=X_axis_breaks, color="gray45", size=0.6,linetype=2) +
  scale_x_date(breaks = as.Date(X_axis_breaks) , labels = X_axis_labels) 

print(plot)


#### Figure 5 ####
start_date_cal <- ymd("20-01-01")
end_date_cal   <- ymd("20-08-28")
summ_stats_EOD <- fread(paste0(current_path,"/data/wrds_summary_statistics/logit_estimation_data/SumStats_combine_TRACE_standard_EOD_Jan_June.csv"))

# Change date format
summ_stats_EOD[,DATE:=ymd(DATE)]

# Remove weekends and holidays
all_DATES <- seq(min(summ_stats_EOD$DATE),max(summ_stats_EOD$DATE),by="1 day")
indic_B <- isBusinessDay(calendar="UnitedStates",all_DATES)
all_B_DATES<-all_DATES[indic_B]
summ_stats_EOD <- summ_stats_EOD[DATE %in% all_B_DATES]

DD <- summ_stats_EOD[,c("DATE","ph","pl","xl","daily_netinflow_vol","daily_vol_customer_sold")]
old_names = c("daily_netinflow_vol")
new_names <- c("Delta_I")
setnames(DD,old_names,new_names)
# set the key for merging
setkey(DD,"DATE")
DD_complete <- DD


# Extract the volume of "dealer buy from costumer" from the market sentiment series, our proxy for N
market_sentiment <- fread(paste0(current_path,"/market_sentiment/merged_market_sentiment.csv"))
DT <-  market_sentiment[,c("DATE","TotalVolume_DealerBuyFromCustomer_AllSecurities")]
old_names <- c("DATE","TotalVolume_DealerBuyFromCustomer_AllSecurities")
new_names <- c("DATE","N")
setnames(DT,old_names,new_names)
# Set key for merging
setkey(DT,"DATE")
DT[,`:=` (DATE=as.Date(DATE))]

# Merge the two data sets
DD <- merge(DD,DT,all.x=TRUE)

DDprior <- DD[DATE>=ymd("2016-01-01") & DATE<=start_date_cal] 
DDprior[, `:=` (N=na.approx(N))] #in 2019 there are missing data for volume
DD<- DD[DATE>=start_date_cal & DATE<=end_date_cal]
DD[, `:=` (N=na.approx(N))] #in 2019 there are missing data for volume



# Compares mean 2020 with prior years
DD19mean<-DDprior[,lapply(.SD,mean),by=month(DATE)]
DD19mean[,month:=month(month,label=TRUE)]
DD19mean[,period:="2016-2019"]

DD20mean<-DD[,lapply(.SD,mean),by=month(DATE)]
DD20mean[,month:=month(month,label=TRUE)]
DD20mean[,period:="2020"]

DDmean<-rbind(DD19mean,DD20mean)

p <- ggplot(data=DDmean[as.numeric(month)<=8], aes(x=month, y=N/1000)) +   geom_bar(aes(fill=period),position="dodge",stat="identity") + theme_bw() + #scale_fill_brewer(palette='Blues') + 
  scale_fill_manual("",values = c("gray69", mahyarblue)) +
  ylab("customer-to-dealer volume (billions of USD)") + xlab("month of the year") +
  theme(axis.text.x = element_text(size=ftxtsize,face="plain"), axis.text.y = element_text(size=ftxtsize,face="plain"),
        axis.title.x = element_text(size=ftxtsize,face="plain"), axis.title.y = element_text(size=ftxtsize,face="plain"),
        legend.text = element_text(size=ftxtsize,face="plain")) + 
  theme(axis.text.x = element_text(size=ftxtsize,face="plain"), axis.text.y = element_text(size=ftxtsize,face="plain"),
        axis.title.x = element_text(size=ftxtsize,face="plain"), axis.title.y = element_text(size=ftxtsize,face="plain"),
        legend.text = element_text(size=ftxtsize,face="plain")) + 
  theme(legend.key.size =  unit(0.64, "in"),legend.key.height =  unit(0.2, "in"),
        legend.key.width =  unit(0.4, "in"),legend.position="bottom",
        legend.spacing.x = unit(.6, 'cm'),legend.margin=margin(t=-10))
print(p)


#### Figure 7 ####
DD[, `:=` (xh=1-xl)] #proportion of risky principal trades
DD[, `:=` (Xh=N*xh)] #dollar volume of risky principal trades
DD[, `:=` (logXh = log(Xh))] #log dollar volume of risky principal trades
DD[, `:=` (Xl=N*xl)] #dollar volume of agency trades
DD[, `:=` (logXl = log(Xl))] #log dollar volume of agnecy trades
DD[, `:=` (logN = log(N))] #log dollar volume
DD[, `:=` (MRS=ph/pl)] #relative price of risky principal to agency trades
DD[, `:=` (logMRS = log(MRS))] #log relative price
DD[, `:=` (a= xl*pl+(1-xl)*ph)] #average transaction cost
#Lagged variables
DD[, `:=` (xh_lag = shift(xh,type="lag",fill=xh[1L]))]
DD[, `:=` (Xh_lag =  shift(Xh,type="lag",fill = Xh[1L]))]
DD[, `:=` (logXh_lag =  shift(logXh,type="lag",fill=logXh[1L]))]
DD[, `:=` (xl_lag =  shift(xl,type="lag",fill=xl[1L]))]
DD[, `:=` (Xl_lag =  shift(Xl,type="lag",fill=Xl[1L]))]
DD[, `:=` (logXl_lag =  shift(logXl,type="lag",fill=logXl[1L]))]
DD[, `:=` (N_lag =  shift(N,type="lag",fill=N[1L]))]
DD[, `:=` (logN_lag =  shift(logN,type="lag",fill=logN[1L]))]
DD[, `:=` (pl_lag =  shift(pl,type="lag",fill=pl[1L]))]
DD[, `:=` (ph_lag =  shift(ph,type="lag",fill=ph[1L]))]
DD[, `:=` (MRS_lag =  shift(MRS,type="lag",fill=MRS[1L]))]
DD[, `:=` (logMRS_lag =  shift(logMRS,type="lag",fill=logMRS[1L]))]
DD[, `:=` (a_lag =  shift(a,type="lag",fill=a[1L]))]

#Changes in variables
DD[, `:=` (dxh=ifelse(is.na(xh - xh_lag),0,xh - xh_lag))]
DD[, `:=` (dXh=ifelse(is.na(Xh - Xh_lag),0,Xh - Xh_lag))]
DD[, `:=` (dlogXh=ifelse(is.na(logXh - logXh_lag),0,logXh - logXh_lag))]
DD[, `:=` (dxl=ifelse(is.na(xl - xl_lag),0,xl - xl_lag))]
DD[, `:=` (dXl=ifelse(is.na(Xl - Xl_lag),0,Xh - Xl_lag))]
DD[, `:=` (dlogXl=ifelse(is.na(logXl - logXl_lag),0,logXl - logXl_lag))]
DD[, `:=` (dN=ifelse(is.na(N - N_lag),0,N - N_lag))]
DD[, `:=` (dlogN=ifelse(is.na(logN - logN_lag),0,logN - logN_lag))]
DD[, `:=` (dph=ifelse(is.na(ph - ph_lag),0,ph - ph_lag))]
DD[, `:=` (dpl=ifelse(is.na(pl - pl_lag),0,pl - pl_lag))]
DD[, `:=` (dMRS=ifelse(is.na(MRS - MRS_lag),0,MRS - MRS_lag))]
DD[, `:=` (dlogMRS=ifelse(is.na(logMRS - logMRS_lag),0,logMRS - logMRS_lag))]
DD[, `:=` (da=ifelse(is.na(a - a_lag),0,a - a_lag))]

# Moving averages for quantities
MA_param <- 10

DD[,`:=`(xl_MA=movavg(xl,MA_param ,"s") )] #moving average for agency trades proportion
DD[,`:=`(xh_MA=movavg(xh,MA_param ,"s") )] #moving average for risky principal trades proportion
DD[,`:=` (N_MA=movavg(N,MA_param ,"s"))] #moving average of volume
DD[,`:=`(Xh_MA=movavg(Xh,MA_param ,"s") )] #moving average for risky principal trades volume
DD[,`:=`(Xl_MA=movavg(Xl,MA_param ,"s") )] #moving average for risky principal trades volume

DD[, `:=` (xh_MA_lag = shift(xh_MA,type="lag",fill=xh[1L]))]
DD[, `:=` (Xh_MA_lag =  shift(Xh_MA,type="lag",fill = Xh[1L]))]
DD[, `:=` (xl_MA_lag =  shift(xl_MA,type="lag",fill=xl[1L]))]
DD[, `:=` (Xl_MA_lag =  shift(Xl_MA,type="lag",fill=Xl[1L]))]


#Changes in variables
DD[, `:=` (dxh_MA=xh_MA - xh_MA_lag)]
DD[, `:=` (dXh=Xh_MA - Xh_MA_lag)]
DD[, `:=` (dxl_MA=xl_MA - xl_MA_lag)]
DD[, `:=` (dXl_MA=Xl_MA - Xl_MA_lag)]




#Average transaction cost with MA for quantities
DD[, `:=` (a_MA= xl_MA*pl+xh_MA*ph)] #average transaction cost
DD[, `:=` (Da_MA= a_MA - a_MA[1])] # change in average transaction cost

#Surplus calculation, no MA for quantities, via Envelope

w_up <- 1 #weight given to the upper bound 
w_lo <- 1-w_up #weight given to the lower bound 

# The upper bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_up= -xl*dpl-xh*dph)]
DD[, `:=` (Ds_up=cumsum(ds_up))]
# The lower bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_lo= -xl_lag*dpl-xh_lag*dph)]
DD[, `:=` (Ds_lo=cumsum(ds_lo))]
# We use the convention that level variables are not lagged for calculating changes
# [this means that we use the upper bound]
DD[, `:=` (Ds = w_up*Ds_up+w_lo*Ds_lo) ]
DD[, `:=` (ds = w_up*ds_up+w_lo*Ds_lo) ]

# Surplus calculation, with MA for quantities, via Envelope

# The upper bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_MA_up= -xl_MA*dpl-xh_MA*dph)]
DD[, `:=` (Ds_MA_up=cumsum(ds_MA_up))]
# The lower bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_MA_lo= -xl_MA_lag*dpl-xh_MA_lag*dph)]
DD[, `:=` (Ds_MA_lo=cumsum(ds_MA_lo))]
# We use the convention that level variables are not lagged for calculating changes
# [this means that we use the upper bound]
DD[, `:=` (Ds_MA = w_up*Ds_MA_up+w_lo*Ds_MA_lo) ]
DD[, `:=` (ds_MA = w_up*ds_MA_up+w_lo*Ds_MA_lo) ]





# Change in utility
DD[, `:=` (du = dxl*pl + dxh*ph)]
DD[, `:=` (Du=cumsum(du))]
# Robustness check: used lagged level variables
DD[, `:=` (du_laglevel =dxl*pl_lag+dxh*ph_lag )]
DD[, `:=` (Du_laglevel=cumsum(du_laglevel))]


# Change in average transaction cost
# Average transaction cost
DD[, `:=` (Da= a - a[1])]


#Change in surplus, logit



#Estimate of sigma
#A is early January to mid February B is mid May to end June

sub_A <- DD[DATE>=ymd("2020-01-15") & DATE<=ymd("2020-02-15"),DATE]
N_A   <- length(sub_A)
mean_price_gap_A <- mean(DD[DATE %in% sub_A,ph-pl])
mean_log_trade_ratio_A <- mean(DD[DATE %in% sub_A,log(xh/xl)])
mean_log_trade_ratio_MA_A <- mean(DD[DATE %in% sub_A,log(xh_MA/xl_MA)])


sub_B <- DD[DATE>=ymd("2020-06-01") & DATE<=ymd("2020-06-30"),DATE]
N_B <- length(sub_B)
mean_price_gap_B <- mean(DD[DATE %in% sub_B,ph-pl])
mean_log_trade_ratio_B <- mean(DD[DATE %in% sub_B,log(xh/xl)])
mean_log_trade_ratio_MA_B <- mean(DD[DATE %in% sub_B,log(xh_MA/xl_MA)])

sigma_bp <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_B-mean_log_trade_ratio_A) 
sigma_bp_MA <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A) 


# For the picture, the rectangles corresponding to sub period A and B

rectangle_A <- data.frame(xmin = min(sub_A),
                          xmax = max(sub_A),
                          ymin = -Inf, ymax = Inf)
rectangle_B <- data.frame(xmin = min(sub_B),
                          xmax = max(sub_B),
                          ymin = -Inf, ymax = Inf)



#bind together the de-meaned price gap for the two subperiods
de_mean_price_gap_A <- DD[DATE %in% sub_A,ph-pl] -mean_price_gap_A  
de_mean_price_gap_B <- DD[DATE %in% sub_B,ph-pl] -mean_price_gap_B  
de_mean_price_gap   <- c(de_mean_price_gap_A,de_mean_price_gap_B)

#bind together the de-meaned log trade ratio for the two subperiods
de_mean_log_trade_ratio_A <- DD[DATE %in% sub_A,log(xh/xl)] -mean_log_trade_ratio_A  
de_mean_log_trade_ratio_B <- DD[DATE %in% sub_B,log(xh/xl)] -mean_log_trade_ratio_B  
de_mean_log_trade_ratio   <- c(de_mean_log_trade_ratio_A,de_mean_log_trade_ratio_B)

#bind together the de-meaned log trade ratio MA for the two subperiods
de_mean_log_trade_ratio_MA_A <- DD[DATE %in% sub_A,log(xh_MA/xl_MA)] -mean_log_trade_ratio_MA_A  
de_mean_log_trade_ratio_MA_B <- DD[DATE %in% sub_B,log(xh_MA/xl_MA)] -mean_log_trade_ratio_MA_B  
de_mean_log_trade_ratio_MA   <- c(de_mean_log_trade_ratio_MA_A,de_mean_log_trade_ratio_MA_B)


#calculate the sample variance covariance matrix (one can probably do better...)
de_mean_matrix <- as.matrix(rbind(de_mean_price_gap,de_mean_log_trade_ratio))
sample_covar <- de_mean_matrix%*%t(de_mean_matrix)/(N_A+N_B-2)

#same with MA series
de_mean_matrix_MA <- as.matrix(rbind(de_mean_price_gap,de_mean_log_trade_ratio_MA))
sample_covar_MA <- de_mean_matrix_MA%*%t(de_mean_matrix_MA)/(N_A+N_B-2)

#calculate the standard error using the delta method
grad_f1 <- 1/(mean_log_trade_ratio_B-mean_log_trade_ratio_A) 
grad_f2 <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_B-mean_log_trade_ratio_A)^2
grad_f <- as.matrix(c(grad_f1,grad_f2))
std_sigma_bp <- sqrt( t(grad_f)%*%sample_covar%*%grad_f*(1/N_A+1/N_B))

#same with MA series
grad_f1_MA <- -1/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A) 
grad_f2_MA <- (mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A)^2
grad_f_MA <- as.matrix(c(grad_f1_MA,grad_f2_MA))
std_sigma_bp_MA <- sqrt( t(grad_f)%*%sample_covar_MA%*%grad_f*(1/N_A+1/N_B))

sigma_bp_hi <- sigma_bp + 2*std_sigma_bp
sigma_bp_lo <- sigma_bp - 2*std_sigma_bp

DD[, `:=` (Ds_MA_logit= - sigma_bp*log(xl_MA/xl_MA[1]) - (pl-pl[1]))]
DD[, `:=` (Ds_MA_logit_hi= - (sigma_bp_hi)*log(xl_MA/xl_MA[1]) - (pl-pl[1]))]
DD[, `:=` (Ds_MA_logit_lo= - (sigma_bp_lo)*log(xl_MA/xl_MA[1]) - (pl-pl[1]))]


DD[, `:=` (dv = sigma_bp*dxh/xh/(1-xh) +xh*dph)]

DD[, `:=` (ds_logit_check= sigma_bp*dxh_MA/(1-xh_MA) - (pl-pl_lag))]
DD[, `:=` (Ds_logit_check= cumsum(ds_logit_check))]
#Decomposition of change in surplus, logit
#DD[, `:=` (ds_logit_pref = sigma_bp*dxh/(1-xh) + (dph-dpl)*xh)]
#DD[, `:=` (Ds_logit_pref = cumsum(ds_logit_pref))]

#Decomposition of change in surplus, logit
#Transaction costs
DD[, `:=` (ds_logit_cost = -da)]
DD[, `:=` (Ds_logit_cost = -a+a[1])]
#Composition
DD[, `:=` (ds_logit_comp = dxh_MA*(ph-pl))]
DD[, `:=` (Ds_logit_comp = cumsum(ds_logit_comp))]
#Preference
DD[, `:=` (ds_logit_pref = sigma_bp*dxh_MA/(1-xh_MA) + (dph-dpl)*xh_MA)]
DD[, `:=` (Ds_logit_pref = cumsum(ds_logit_pref))]
DD[, `:=` (ds_logit_pref_hi = sigma_bp_hi*dxh_MA/(1-xh_MA) + (dph-dpl)*xh_MA)]
DD[, `:=` (Ds_logit_pref_hi = cumsum(ds_logit_pref_hi))]
DD[, `:=` (ds_logit_pref_lo = sigma_bp_lo*dxh_MA/(1-xh_MA) + (dph-dpl)*xh_MA)]
DD[, `:=` (Ds_logit_pref_lo = cumsum(ds_logit_pref_lo))]




#Change in price gap

DD[, `:=` (Dprice_gap = (ph-pl)-(ph[1]-pl[1]) )]
DD[, `:=` (Dprice_gap_along_demand = -sigma_bp*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD[, `:=` (Dprice_gap_shifter = Dprice_gap- Dprice_gap_along_demand)]


DD[, `:=` (Dprice_gap_along_demand_hi = -(sigma_bp_hi)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD[, `:=` (Dprice_gap_along_demand_lo = -(sigma_bp_lo)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]


rectangle_A <- data.frame(xmin = min(sub_A),
                          xmax = max(sub_A),
                          ymin = -Inf, ymax = Inf)
rectangle_B <- data.frame(xmin = min(sub_B),
                          xmax = max(sub_B),
                          ymin = -Inf, ymax = Inf)
start_date_pic <- ymd("20-01-19")
end_date_pic <- ymd("20-07-01")

plot <- ggplot() +
  geom_line(data = DD[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic ],aes(x=as.Date(DATE),y=Dprice_gap_shifter),
            color=mahyarblue, size = 1.4) + 
  geom_ribbon(data = DD[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic ],
              aes(x=as.Date(DATE),ymin = Dprice_gap - Dprice_gap_along_demand_hi, 
                  ymax = Dprice_gap - Dprice_gap_along_demand_lo), alpha= 0.2,fill = "grey70") + 
  xlab("") + ylab("Change in relative demand shocks (bps)") + theme_bw() +
  geom_text(aes(x=as.Date("2020-02-19"), label="stock market peak", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-05"), label="stock market trough", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-18"), label="PDCF", y = 25, vjust=-0.3), colour="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-23"), label="P/SMCCF announced", y = 25, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-04-09"), label="P/SMCCF expanded", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-05-12"), label="SMCCF began buying ETFs", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-16"), label="SMCCF began buying bonds", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-29"), label="PMCCF began operating", y = 100, vjust=-0.3), color="gray40", angle=90) +
  theme(axis.text.x = element_text(size=ftxtsize,face="plain"), axis.text.y = element_text(size=ftxtsize,face="plain"),
        axis.title.x = element_text(size=ftxtsize,face="plain"), axis.title.y = element_text(size=ftxtsize,face="plain"),
        legend.text = element_text(size=ftxtsize,face="plain")) + 
  theme(legend.key.size =  unit(0.64, "in"),legend.key.height =  unit(0.2, "in"),
        legend.key.width =  unit(0.4, "in"),legend.position="bottom",
        legend.spacing.x = unit(.3, 'cm'),legend.margin=margin(t=-10))+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 0.6)) + 
  geom_vline(xintercept=X_axis_breaks_EOD, color="gray45", size=0.6,linetype=2) +
  scale_x_date(breaks = as.Date(X_axis_breaks_EOD) , labels = X_axis_labels_EOD) + 
  geom_rect(data = rectangle_A, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
            fill = "darkseagreen4", alpha = 0.2) + 
  geom_rect(data = rectangle_B, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
            fill = "darkseagreen4", alpha = 0.2)


print(plot)


#### Figure 8 ####

# Start and end date for the calculations
# start_date_cal <- ymd("20-01-01")
start_date_cal <- ymd("20-01-15")
end_date_cal   <- ymd("20-08-28")

# Start and end date for the pictures  
start_date_pic <- ymd("20-01-19")
end_date_pic  <- ymd("20-07-01")

# Set the date breaks for the pictures
X_axis_breaks_EOD <- c(start_date_pic,as.Date(c("2019-12-19","2020-01-19","2020-02-19","2020-03-05","2020-03-18",
                                                "2020-03-23", "2020-04-09","2020-05-12","2020-06-16","2020-06-29")))
X_axis_labels_EOD <- format(X_axis_breaks_EOD,format = "%b-%d")




summ_stats_EOD <- fread(paste0(current_path,"/data/wrds_summary_statistics/logit_estimation_data/SumStats_combine_TRACE_standard_EOD_Jan_June.csv"))
summ_stats_EOD_eligible <- fread(paste0(current_path,"/data/wrds_summary_statistics/logit_estimation_data/EligibleBonds_standard_EOD_summarystatistics_04042021.csv"))
summ_stats_EOD_ineligible <- fread(paste0(current_path,"/data/wrds_summary_statistics/logit_estimation_data/IneligibleBonds_standard_EOD_summarystatistics_04042021.csv"))

# Change date format
summ_stats_EOD[,DATE:=ymd(DATE)]
summ_stats_EOD_eligible[,DATE:=ymd(DATE)]
summ_stats_EOD_ineligible[,DATE:=ymd(DATE)]

#Remove weekends and holidays
all_DATES <- seq(min(summ_stats_EOD$DATE),max(summ_stats_EOD$DATE),by="1 day")
indic_B <- isBusinessDay(calendar="UnitedStates",all_DATES)
all_B_DATES <- all_DATES[indic_B]
summ_stats_EOD <- summ_stats_EOD[DATE %in% all_B_DATES]


DD <- summ_stats_EOD[,c("DATE","ph","pl","xl","daily_netinflow_vol","daily_vol_customer_sold")]
DD_eligible <- summ_stats_EOD_eligible[,c("DATE","ph","pl","xl","daily_netinflow_vol","daily_vol_customer_sold")]
DD_ineligible <- summ_stats_EOD_ineligible[,c("DATE","ph","pl","xl","daily_netinflow_vol","daily_vol_customer_sold")]
old_names = c("daily_netinflow_vol")
new_names <- c("Delta_I")
setnames(DD,old_names,new_names)
setnames(DD_eligible,old_names,new_names)
setnames(DD_ineligible,old_names,new_names)

# set the key for merging
setkey(DD,"DATE")
DD_complete <- DD


# Extract the volume of "dealer buy from costumer" from the market sentiment series, our proxy for N
market_sentiment <- fread(paste0(current_path,"/market_sentiment/merged_market_sentiment.csv"))
DT <-  market_sentiment[,c("DATE","TotalVolume_DealerBuyFromCustomer_AllSecurities")]
old_names <- c("DATE","TotalVolume_DealerBuyFromCustomer_AllSecurities")
new_names <- c("DATE","N")
setnames(DT,old_names,new_names)
# Set key for merging
setkey(DT,"DATE")
DT[,`:=` (DATE=as.Date(DATE))]

# Merge the two data sets
DD <- merge(DD,DT,all.x=TRUE)
DD_eligible <- merge(DD_eligible,DT,all.x=TRUE)
DD_ineligible <- merge(DD_ineligible,DT,all.x=TRUE)

DDprior <- DD[DATE>=ymd("2016-01-01") & DATE<=start_date_cal] 
DDprior[, `:=` (N=na.approx(N))] #in 2019 there are missing data for volume
DD <- DD[DATE>=start_date_cal & DATE<=end_date_cal]
DD[, `:=` (N=na.approx(N))] #in 2019 there are missing data for volume

DD_eligible <- DD_eligible[DATE>=start_date_cal & DATE<=end_date_cal]
DD_eligible[, `:=` (N=na.approx(N))] #in 2019 there are missing data for volume

DD_ineligible <- DD_ineligible[!is.na(ph)]
DD_ineligible <- DD_ineligible[DATE>=start_date_cal & DATE<=end_date_cal]
DD_ineligible[, `:=` (N=na.approx(N))] #in 2019 there are missing data for volume

# Compares mean 2020 with prior years
DD19mean<-DDprior[,lapply(.SD,mean),by=month(DATE)]
DD19mean[,month:=month(month,label=TRUE)]
DD19mean[,period:="2016-2019"]

DD20mean<-DD[,lapply(.SD,mean),by=month(DATE)]
DD20mean[,month:=month(month,label=TRUE)]
DD20mean[,period:="2020"]

DDmean <- rbind(DD19mean,DD20mean)


# New variables 
DD[, `:=` (xh=1-xl)] #proportion of risky principal trades
DD[, `:=` (Xh=N*xh)] #dollar volume of risky principal trades
DD[, `:=` (logXh = log(Xh))] #log dollar volume of risky principal trades
DD[, `:=` (Xl=N*xl)] #dollar volume of agency trades
DD[, `:=` (logXl = log(Xl))] #log dollar volume of agnecy trades
DD[, `:=` (logN = log(N))] #log dollar volume
DD[, `:=` (MRS=ph/pl)] #relative price of risky principal to agency trades
DD[, `:=` (logMRS = log(MRS))] #log relative price
DD[, `:=` (a= xl*pl+(1-xl)*ph)] #average transaction cost

DD_eligible[, `:=` (xh=1-xl)] #proportion of risky principal trades
DD_eligible[, `:=` (Xh=N*xh)] #dollar volume of risky principal trades
DD_eligible[, `:=` (logXh = log(Xh))] #log dollar volume of risky principal trades
DD_eligible[, `:=` (Xl=N*xl)] #dollar volume of agency trades
DD_eligible[, `:=` (logXl = log(Xl))] #log dollar volume of agnecy trades
DD_eligible[, `:=` (logN = log(N))] #log dollar volume
DD_eligible[, `:=` (MRS=ph/pl)] #relative price of risky principal to agency trades
DD_eligible[, `:=` (logMRS = log(MRS))] #log relative price
DD_eligible[, `:=` (a= xl*pl+(1-xl)*ph)] #average transaction cost

DD_ineligible[, `:=` (xh=1-xl)] #proportion of risky principal trades
DD_ineligible[, `:=` (Xh=N*xh)] #dollar volume of risky principal trades
DD_ineligible[, `:=` (logXh = log(Xh))] #log dollar volume of risky principal trades
DD_ineligible[, `:=` (Xl=N*xl)] #dollar volume of agency trades
DD_ineligible[, `:=` (logXl = log(Xl))] #log dollar volume of agnecy trades
DD_ineligible[, `:=` (logN = log(N))] #log dollar volume
DD_ineligible[, `:=` (MRS=ph/pl)] #relative price of risky principal to agency trades
DD_ineligible[, `:=` (logMRS = log(MRS))] #log relative price
DD_ineligible[, `:=` (a= xl*pl+(1-xl)*ph)] #average transaction cost



# Lagged variables
DD[, `:=` (xh_lag = shift(xh,type="lag",fill=xh[1L]))]
DD[, `:=` (Xh_lag =  shift(Xh,type="lag",fill = Xh[1L]))]
DD[, `:=` (logXh_lag =  shift(logXh,type="lag",fill=logXh[1L]))]
DD[, `:=` (xl_lag =  shift(xl,type="lag",fill=xl[1L]))]
DD[, `:=` (Xl_lag =  shift(Xl,type="lag",fill=Xl[1L]))]
DD[, `:=` (logXl_lag =  shift(logXl,type="lag",fill=logXl[1L]))]
DD[, `:=` (N_lag =  shift(N,type="lag",fill=N[1L]))]
DD[, `:=` (logN_lag =  shift(logN,type="lag",fill=logN[1L]))]
DD[, `:=` (pl_lag =  shift(pl,type="lag",fill=pl[1L]))]
DD[, `:=` (ph_lag =  shift(ph,type="lag",fill=ph[1L]))]
DD[, `:=` (MRS_lag =  shift(MRS,type="lag",fill=MRS[1L]))]
DD[, `:=` (logMRS_lag =  shift(logMRS,type="lag",fill=logMRS[1L]))]
DD[, `:=` (a_lag =  shift(a,type="lag",fill=a[1L]))]

DD_eligible[, `:=` (xh_lag = shift(xh,type="lag",fill=xh[1L]))]
DD_eligible[, `:=` (Xh_lag =  shift(Xh,type="lag",fill = Xh[1L]))]
DD_eligible[, `:=` (logXh_lag =  shift(logXh,type="lag",fill=logXh[1L]))]
DD_eligible[, `:=` (xl_lag =  shift(xl,type="lag",fill=xl[1L]))]
DD_eligible[, `:=` (Xl_lag =  shift(Xl,type="lag",fill=Xl[1L]))]
DD_eligible[, `:=` (logXl_lag =  shift(logXl,type="lag",fill=logXl[1L]))]
DD_eligible[, `:=` (N_lag =  shift(N,type="lag",fill=N[1L]))]
DD_eligible[, `:=` (logN_lag =  shift(logN,type="lag",fill=logN[1L]))]
DD_eligible[, `:=` (pl_lag =  shift(pl,type="lag",fill=pl[1L]))]
DD_eligible[, `:=` (ph_lag =  shift(ph,type="lag",fill=ph[1L]))]
DD_eligible[, `:=` (MRS_lag =  shift(MRS,type="lag",fill=MRS[1L]))]
DD_eligible[, `:=` (logMRS_lag =  shift(logMRS,type="lag",fill=logMRS[1L]))]
DD_eligible[, `:=` (a_lag =  shift(a,type="lag",fill=a[1L]))]


DD_ineligible[, `:=` (xh_lag = shift(xh,type="lag",fill=xh[1L]))]
DD_ineligible[, `:=` (Xh_lag =  shift(Xh,type="lag",fill = Xh[1L]))]
DD_ineligible[, `:=` (logXh_lag =  shift(logXh,type="lag",fill=logXh[1L]))]
DD_ineligible[, `:=` (xl_lag =  shift(xl,type="lag",fill=xl[1L]))]
DD_ineligible[, `:=` (Xl_lag =  shift(Xl,type="lag",fill=Xl[1L]))]
DD_ineligible[, `:=` (logXl_lag =  shift(logXl,type="lag",fill=logXl[1L]))]
DD_ineligible[, `:=` (N_lag =  shift(N,type="lag",fill=N[1L]))]
DD_ineligible[, `:=` (logN_lag =  shift(logN,type="lag",fill=logN[1L]))]
DD_ineligible[, `:=` (pl_lag =  shift(pl,type="lag",fill=pl[1L]))]
DD_ineligible[, `:=` (ph_lag =  shift(ph,type="lag",fill=ph[1L]))]
DD_ineligible[, `:=` (MRS_lag =  shift(MRS,type="lag",fill=MRS[1L]))]
DD_ineligible[, `:=` (logMRS_lag =  shift(logMRS,type="lag",fill=logMRS[1L]))]
DD_ineligible[, `:=` (a_lag =  shift(a,type="lag",fill=a[1L]))]


# Changes in variables
DD[, `:=` (dxh=ifelse(is.na(xh - xh_lag),0,xh - xh_lag))]
DD[, `:=` (dXh=ifelse(is.na(Xh - Xh_lag),0,Xh - Xh_lag))]
DD[, `:=` (dlogXh=ifelse(is.na(logXh - logXh_lag),0,logXh - logXh_lag))]
DD[, `:=` (dxl=ifelse(is.na(xl - xl_lag),0,xl - xl_lag))]
DD[, `:=` (dXl=ifelse(is.na(Xl - Xl_lag),0,Xh - Xl_lag))]
DD[, `:=` (dlogXl=ifelse(is.na(logXl - logXl_lag),0,logXl - logXl_lag))]
DD[, `:=` (dN=ifelse(is.na(N - N_lag),0,N - N_lag))]
DD[, `:=` (dlogN=ifelse(is.na(logN - logN_lag),0,logN - logN_lag))]
DD[, `:=` (dph=ifelse(is.na(ph - ph_lag),0,ph - ph_lag))]
DD[, `:=` (dpl=ifelse(is.na(pl - pl_lag),0,pl - pl_lag))]
DD[, `:=` (dMRS=ifelse(is.na(MRS - MRS_lag),0,MRS - MRS_lag))]
DD[, `:=` (dlogMRS=ifelse(is.na(logMRS - logMRS_lag),0,logMRS - logMRS_lag))]
DD[, `:=` (da=ifelse(is.na(a - a_lag),0,a - a_lag))]

DD_eligible[, `:=` (dxh=ifelse(is.na(xh - xh_lag),0,xh - xh_lag))]
DD_eligible[, `:=` (dXh=ifelse(is.na(Xh - Xh_lag),0,Xh - Xh_lag))]
DD_eligible[, `:=` (dlogXh=ifelse(is.na(logXh - logXh_lag),0,logXh - logXh_lag))]
DD_eligible[, `:=` (dxl=ifelse(is.na(xl - xl_lag),0,xl - xl_lag))]
DD_eligible[, `:=` (dXl=ifelse(is.na(Xl - Xl_lag),0,Xh - Xl_lag))]
DD_eligible[, `:=` (dlogXl=ifelse(is.na(logXl - logXl_lag),0,logXl - logXl_lag))]
DD_eligible[, `:=` (dN=ifelse(is.na(N - N_lag),0,N - N_lag))]
DD_eligible[, `:=` (dlogN=ifelse(is.na(logN - logN_lag),0,logN - logN_lag))]
DD_eligible[, `:=` (dph=ifelse(is.na(ph - ph_lag),0,ph - ph_lag))]
DD_eligible[, `:=` (dpl=ifelse(is.na(pl - pl_lag),0,pl - pl_lag))]
DD_eligible[, `:=` (dMRS=ifelse(is.na(MRS - MRS_lag),0,MRS - MRS_lag))]
DD_eligible[, `:=` (dlogMRS=ifelse(is.na(logMRS - logMRS_lag),0,logMRS - logMRS_lag))]
DD_eligible[, `:=` (da=ifelse(is.na(a - a_lag),0,a - a_lag))]

DD_ineligible[, `:=` (dxh=ifelse(is.na(xh - xh_lag),0,xh - xh_lag))]
DD_ineligible[, `:=` (dXh=ifelse(is.na(Xh - Xh_lag),0,Xh - Xh_lag))]
DD_ineligible[, `:=` (dlogXh=ifelse(is.na(logXh - logXh_lag),0,logXh - logXh_lag))]
DD_ineligible[, `:=` (dxl=ifelse(is.na(xl - xl_lag),0,xl - xl_lag))]
DD_ineligible[, `:=` (dXl=ifelse(is.na(Xl - Xl_lag),0,Xh - Xl_lag))]
DD_ineligible[, `:=` (dlogXl=ifelse(is.na(logXl - logXl_lag),0,logXl - logXl_lag))]
DD_ineligible[, `:=` (dN=ifelse(is.na(N - N_lag),0,N - N_lag))]
DD_ineligible[, `:=` (dlogN=ifelse(is.na(logN - logN_lag),0,logN - logN_lag))]
DD_ineligible[, `:=` (dph=ifelse(is.na(ph - ph_lag),0,ph - ph_lag))]
DD_ineligible[, `:=` (dpl=ifelse(is.na(pl - pl_lag),0,pl - pl_lag))]
DD_ineligible[, `:=` (dMRS=ifelse(is.na(MRS - MRS_lag),0,MRS - MRS_lag))]
DD_ineligible[, `:=` (dlogMRS=ifelse(is.na(logMRS - logMRS_lag),0,logMRS - logMRS_lag))]
DD_ineligible[, `:=` (da=ifelse(is.na(a - a_lag),0,a - a_lag))]



# Moving averages for quantities
MA_param <- 10

DD[,`:=`(xl_MA=movavg(xl,MA_param ,"s") )] #moving average for agency trades proportion
DD[,`:=`(xh_MA=movavg(xh,MA_param ,"s") )] #moving average for risky principal trades proportion
DD[,`:=` (N_MA=movavg(N,MA_param ,"s"))] #moving average of volume
DD[,`:=`(Xh_MA=movavg(Xh,MA_param ,"s") )] #moving average for risky principal trades volume
DD[,`:=`(Xl_MA=movavg(Xl,MA_param ,"s") )] #moving average for risky principal trades volume

DD[, `:=` (xh_MA_lag = shift(xh_MA,type="lag",fill=xh[1L]))]
DD[, `:=` (Xh_MA_lag =  shift(Xh_MA,type="lag",fill = Xh[1L]))]
DD[, `:=` (xl_MA_lag =  shift(xl_MA,type="lag",fill=xl[1L]))]
DD[, `:=` (Xl_MA_lag =  shift(Xl_MA,type="lag",fill=Xl[1L]))]

DD_eligible[,`:=`(xl_MA=movavg(xl,MA_param ,"s") )] #moving average for agency trades proportion
DD_eligible[,`:=`(xh_MA=movavg(xh,MA_param ,"s") )] #moving average for risky principal trades proportion
DD_eligible[,`:=` (N_MA=movavg(N,MA_param ,"s"))] #moving average of volume
DD_eligible[,`:=`(Xh_MA=movavg(Xh,MA_param ,"s") )] #moving average for risky principal trades volume
DD_eligible[,`:=`(Xl_MA=movavg(Xl,MA_param ,"s") )] #moving average for risky principal trades volume

DD_eligible[, `:=` (xh_MA_lag = shift(xh_MA,type="lag",fill=xh[1L]))]
DD_eligible[, `:=` (Xh_MA_lag =  shift(Xh_MA,type="lag",fill = Xh[1L]))]
DD_eligible[, `:=` (xl_MA_lag =  shift(xl_MA,type="lag",fill=xl[1L]))]
DD_eligible[, `:=` (Xl_MA_lag =  shift(Xl_MA,type="lag",fill=Xl[1L]))]


DD_ineligible[,`:=`(xl_MA=movavg(xl,MA_param ,"s") )] #moving average for agency trades proportion
DD_ineligible[,`:=`(xh_MA=movavg(xh,MA_param ,"s") )] #moving average for risky principal trades proportion
DD_ineligible[,`:=` (N_MA=movavg(N,MA_param ,"s"))] #moving average of volume
DD_ineligible[,`:=`(Xh_MA=movavg(Xh,MA_param ,"s") )] #moving average for risky principal trades volume
DD_ineligible[,`:=`(Xl_MA=movavg(Xl,MA_param ,"s") )] #moving average for risky principal trades volume

DD_ineligible[, `:=` (xh_MA_lag = shift(xh_MA,type="lag",fill=xh[1L]))]
DD_ineligible[, `:=` (Xh_MA_lag =  shift(Xh_MA,type="lag",fill = Xh[1L]))]
DD_ineligible[, `:=` (xl_MA_lag =  shift(xl_MA,type="lag",fill=xl[1L]))]
DD_ineligible[, `:=` (Xl_MA_lag =  shift(Xl_MA,type="lag",fill=Xl[1L]))]


# Changes in variables
DD[, `:=` (dxh_MA=xh_MA - xh_MA_lag)]
DD[, `:=` (dXh=Xh_MA - Xh_MA_lag)]
DD[, `:=` (dxl_MA=xl_MA - xl_MA_lag)]
DD[, `:=` (dXl_MA=Xl_MA - Xl_MA_lag)]

DD_eligible[, `:=` (dxh_MA=xh_MA - xh_MA_lag)]
DD_eligible[, `:=` (dXh=Xh_MA - Xh_MA_lag)]
DD_eligible[, `:=` (dxl_MA=xl_MA - xl_MA_lag)]
DD_eligible[, `:=` (dXl_MA=Xl_MA - Xl_MA_lag)]

DD_ineligible[, `:=` (dxh_MA=xh_MA - xh_MA_lag)]
DD_ineligible[, `:=` (dXh=Xh_MA - Xh_MA_lag)]
DD_ineligible[, `:=` (dxl_MA=xl_MA - xl_MA_lag)]
DD_ineligible[, `:=` (dXl_MA=Xl_MA - Xl_MA_lag)]


# Average transaction cost with MA for quantities
DD[, `:=` (a_MA= xl_MA*pl+xh_MA*ph)] #average transaction cost
DD[, `:=` (Da_MA= a_MA - a_MA[1])] # change in average transaction cost

DD_eligible[, `:=` (a_MA= xl_MA*pl+xh_MA*ph)] #average transaction cost
DD_eligible[, `:=` (Da_MA= a_MA - a_MA[1])] # change in average transaction cost

DD_ineligible[, `:=` (a_MA= xl_MA*pl+xh_MA*ph)] #average transaction cost
DD_ineligible[, `:=` (Da_MA= a_MA - a_MA[1])] # change in average transaction cost

# Surplus calculation, no MA for quantities, via Envelope

w_up <- 1 #weight given to the upper bound 
w_lo <- 1-w_up #weight given to the lower bound 

# The upper bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_up= -xl*dpl-xh*dph)]
DD[, `:=` (Ds_up=cumsum(ds_up))]

DD_eligible[, `:=` (ds_up= -xl*dpl-xh*dph)]
DD_eligible[, `:=` (Ds_up=cumsum(ds_up))]

DD_ineligible[, `:=` (ds_up= -xl*dpl-xh*dph)]
DD_ineligible[, `:=` (Ds_up=cumsum(ds_up))]


# The lower bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_lo= -xl_lag*dpl-xh_lag*dph)]
DD[, `:=` (Ds_lo=cumsum(ds_lo))]

DD_eligible[, `:=` (ds_lo= -xl_lag*dpl-xh_lag*dph)]
DD_eligible[, `:=` (Ds_lo=cumsum(ds_lo))]

DD_ineligible[, `:=` (ds_lo= -xl_lag*dpl-xh_lag*dph)]
DD_ineligible[, `:=` (Ds_lo=cumsum(ds_lo))]


# We use the convention that level variables are not lagged for calculating changes
# [this means that we use the upper bound]
DD[, `:=` (Ds = w_up*Ds_up+w_lo*Ds_lo) ]
DD[, `:=` (ds = w_up*ds_up+w_lo*Ds_lo) ]

DD_eligible[, `:=` (Ds = w_up*Ds_up+w_lo*Ds_lo) ]
DD_eligible[, `:=` (ds = w_up*ds_up+w_lo*Ds_lo) ]

DD_ineligible[, `:=` (Ds = w_up*Ds_up+w_lo*Ds_lo) ]
DD_ineligible[, `:=` (ds = w_up*ds_up+w_lo*Ds_lo) ]

# Surplus calculation, with MA for quantities, via Envelope

# The upper bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_MA_up= -xl_MA*dpl-xh_MA*dph)]
DD[, `:=` (Ds_MA_up=cumsum(ds_MA_up))]
#The lower bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_MA_lo= -xl_MA_lag*dpl-xh_MA_lag*dph)]
DD[, `:=` (Ds_MA_lo=cumsum(ds_MA_lo))]
# We use the convention that level variables are not lagged for calculating changes
# [this means that we use the upper bound]
DD[, `:=` (Ds_MA = w_up*Ds_MA_up+w_lo*Ds_MA_lo) ]
DD[, `:=` (ds_MA = w_up*ds_MA_up+w_lo*Ds_MA_lo) ]




# Change in utility
DD[, `:=` (du = dxl*pl + dxh*ph)]
DD[, `:=` (Du=cumsum(du))]
# Robustness check: used lagged level variables 
DD[, `:=` (du_laglevel =dxl*pl_lag+dxh*ph_lag )]
DD[, `:=` (Du_laglevel=cumsum(du_laglevel))]


# Change in average transaction cost
# Average transaction cost
DD[, `:=` (Da= a - a[1])]


# Change in surplus, logit


# Estimate of sigma
# A is early January to mid February B is mid May to end June



sub_A <- DD[DATE>=ymd("2020-01-15") & DATE<=ymd("2020-02-15"),DATE]
N_A   <- length(sub_A)
mean_price_gap_A <- mean(DD[DATE %in% sub_A,ph-pl])
mean_log_trade_ratio_A <- mean(DD[DATE %in% sub_A,log(xh/xl)])
mean_log_trade_ratio_MA_A <- mean(DD[DATE %in% sub_A,log(xh_MA/xl_MA)])


sub_B <- DD[DATE>=ymd("2020-06-01") & DATE<=ymd("2020-06-30"),DATE]
N_B <- length(sub_B)
mean_price_gap_B <- mean(DD[DATE %in% sub_B,ph-pl])
mean_log_trade_ratio_B <- mean(DD[DATE %in% sub_B,log(xh/xl)])
mean_log_trade_ratio_MA_B <- mean(DD[DATE %in% sub_B,log(xh_MA/xl_MA)])

sigma_bp <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_B-mean_log_trade_ratio_A) 
sigma_bp_MA <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A) 

# For the picture, the rectangles corresponding to sub period A and B

rectangle_A <- data.frame(xmin = min(sub_A),
                          xmax = max(sub_A),
                          ymin = -Inf, ymax = Inf)
rectangle_B <- data.frame(xmin = min(sub_B),
                          xmax = max(sub_B),
                          ymin = -Inf, ymax = Inf)



# bind together the de-meaned price gap for the two subperiods
de_mean_price_gap_A <- DD[DATE %in% sub_A,ph-pl] -mean_price_gap_A  
de_mean_price_gap_B <- DD[DATE %in% sub_B,ph-pl] -mean_price_gap_B  
de_mean_price_gap   <- c(de_mean_price_gap_A,de_mean_price_gap_B)

#bind together the de-meaned log trade ratio for the two subperiods
de_mean_log_trade_ratio_A <- DD[DATE %in% sub_A,log(xh/xl)] -mean_log_trade_ratio_A  
de_mean_log_trade_ratio_B <- DD[DATE %in% sub_B,log(xh/xl)] -mean_log_trade_ratio_B  
de_mean_log_trade_ratio   <- c(de_mean_log_trade_ratio_A,de_mean_log_trade_ratio_B)

#bind together the de-meaned log trade ratio MA for the two subperiods
de_mean_log_trade_ratio_MA_A <- DD[DATE %in% sub_A,log(xh_MA/xl_MA)] -mean_log_trade_ratio_MA_A  
de_mean_log_trade_ratio_MA_B <- DD[DATE %in% sub_B,log(xh_MA/xl_MA)] -mean_log_trade_ratio_MA_B  
de_mean_log_trade_ratio_MA   <- c(de_mean_log_trade_ratio_MA_A,de_mean_log_trade_ratio_MA_B)


#calculate the sample variance covariance matrix (one can probably do better...)
de_mean_matrix <- as.matrix(rbind(de_mean_price_gap,de_mean_log_trade_ratio))
sample_covar <- de_mean_matrix%*%t(de_mean_matrix)/(N_A+N_B-2)

#same with MA series
de_mean_matrix_MA <- as.matrix(rbind(de_mean_price_gap,de_mean_log_trade_ratio_MA))
sample_covar_MA <- de_mean_matrix_MA%*%t(de_mean_matrix_MA)/(N_A+N_B-2)

#calculate the standard error using the delta method
grad_f1 <- 1/(mean_log_trade_ratio_B-mean_log_trade_ratio_A) 
grad_f2 <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_B-mean_log_trade_ratio_A)^2
grad_f <- as.matrix(c(grad_f1,grad_f2))
std_sigma_bp <- sqrt( t(grad_f)%*%sample_covar%*%grad_f*(1/N_A+1/N_B))

#same with MA series
grad_f1_MA <- -1/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A) 
grad_f2_MA <- (mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A)^2
grad_f_MA <- as.matrix(c(grad_f1_MA,grad_f2_MA))
std_sigma_bp_MA <- sqrt( t(grad_f)%*%sample_covar_MA%*%grad_f*(1/N_A+1/N_B))

sigma_bp_hi <- sigma_bp + 2*std_sigma_bp
sigma_bp_lo <- sigma_bp - 2*std_sigma_bp

DD[, `:=` (Ds_MA_logit= - sigma_bp*log(xl_MA/xl_MA[1]) - (pl-pl[1]))]
DD[, `:=` (Ds_MA_logit_hi= - (sigma_bp_hi)*log(xl_MA/xl_MA[1]) - (pl-pl[1]))]
DD[, `:=` (Ds_MA_logit_lo= - (sigma_bp_lo)*log(xl_MA/xl_MA[1]) - (pl-pl[1]))]


DD[, `:=` (dv = sigma_bp*dxh/xh/(1-xh) +xh*dph)]

DD[, `:=` (ds_logit_check= sigma_bp*dxh_MA/(1-xh_MA) - (pl-pl_lag))]
DD[, `:=` (Ds_logit_check= cumsum(ds_logit_check))]

# For the picture, the rectangles corresponding to sub period A and B

rectangle_A <- data.frame(xmin = min(sub_A),
                          xmax = max(sub_A),
                          ymin = -Inf, ymax = Inf)
rectangle_B <- data.frame(xmin = min(sub_B),
                          xmax = max(sub_B),
                          ymin = -Inf, ymax = Inf)



#bind together the de-meaned price gap for the two subperiods
de_mean_price_gap_A <- DD[DATE %in% sub_A,ph-pl] -mean_price_gap_A  
de_mean_price_gap_B <- DD[DATE %in% sub_B,ph-pl] -mean_price_gap_B  
de_mean_price_gap   <- c(de_mean_price_gap_A,de_mean_price_gap_B)

#bind together the de-meaned log trade ratio for the two subperiods
de_mean_log_trade_ratio_A <- DD[DATE %in% sub_A,log(xh/xl)] -mean_log_trade_ratio_A  
de_mean_log_trade_ratio_B <- DD[DATE %in% sub_B,log(xh/xl)] -mean_log_trade_ratio_B  
de_mean_log_trade_ratio   <- c(de_mean_log_trade_ratio_A,de_mean_log_trade_ratio_B)

#bind together the de-meaned log trade ratio MA for the two subperiods
de_mean_log_trade_ratio_MA_A <- DD[DATE %in% sub_A,log(xh_MA/xl_MA)] -mean_log_trade_ratio_MA_A  
de_mean_log_trade_ratio_MA_B <- DD[DATE %in% sub_B,log(xh_MA/xl_MA)] -mean_log_trade_ratio_MA_B  
de_mean_log_trade_ratio_MA   <- c(de_mean_log_trade_ratio_MA_A,de_mean_log_trade_ratio_MA_B)


#calculate the sample variance covariance matrix (one can probably do better...)
de_mean_matrix <- as.matrix(rbind(de_mean_price_gap,de_mean_log_trade_ratio))
sample_covar <- de_mean_matrix%*%t(de_mean_matrix)/(N_A+N_B-2)

#same with MA series
de_mean_matrix_MA <- as.matrix(rbind(de_mean_price_gap,de_mean_log_trade_ratio_MA))
sample_covar_MA <- de_mean_matrix_MA%*%t(de_mean_matrix_MA)/(N_A+N_B-2)

#calculate the standard error using the delta method
grad_f1 <- 1/(mean_log_trade_ratio_B-mean_log_trade_ratio_A) 
grad_f2 <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_B-mean_log_trade_ratio_A)^2
grad_f <- as.matrix(c(grad_f1,grad_f2))
std_sigma_bp <- sqrt( t(grad_f)%*%sample_covar%*%grad_f*(1/N_A+1/N_B))

#same with MA series
grad_f1_MA <- -1/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A) 
grad_f2_MA <- (mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A)^2
grad_f_MA <- as.matrix(c(grad_f1_MA,grad_f2_MA))
std_sigma_bp_MA <- sqrt( t(grad_f)%*%sample_covar_MA%*%grad_f*(1/N_A+1/N_B))

sigma_bp_hi <- sigma_bp + 2*std_sigma_bp
sigma_bp_lo <- sigma_bp - 2*std_sigma_bp


#Decomposition of change in surplus, logit
#Transaction costs
DD[, `:=` (ds_logit_cost = -da)]
DD[, `:=` (Ds_logit_cost = -a+a[1])]
#Composition
DD[, `:=` (ds_logit_comp = dxh_MA*(ph-pl))]
DD[, `:=` (Ds_logit_comp = cumsum(ds_logit_comp))]
#Preference
DD[, `:=` (ds_logit_pref = sigma_bp*dxh_MA/(1-xh_MA) + (dph-dpl)*xh_MA)]
DD[, `:=` (Ds_logit_pref = cumsum(ds_logit_pref))]
DD[, `:=` (ds_logit_pref_hi = sigma_bp_hi*dxh_MA/(1-xh_MA) + (dph-dpl)*xh_MA)]
DD[, `:=` (Ds_logit_pref_hi = cumsum(ds_logit_pref_hi))]
DD[, `:=` (ds_logit_pref_lo = sigma_bp_lo*dxh_MA/(1-xh_MA) + (dph-dpl)*xh_MA)]
DD[, `:=` (Ds_logit_pref_lo = cumsum(ds_logit_pref_lo))]




#Change in price gap

DD[, `:=` (Dprice_gap = (ph-pl)-(ph[1]-pl[1]) )]
DD[, `:=` (Dprice_gap_along_demand = -sigma_bp*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD[, `:=` (Dprice_gap_along_demand_80 = -80.06*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD[, `:=` (Dprice_gap_along_demand_70_17 = -70.17*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD[, `:=` (Dprice_gap_along_demand_73_31 = -73.31*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD[, `:=` (Dprice_gap_shifter = Dprice_gap- Dprice_gap_along_demand)]
DD[, `:=` (Dprice_gap_shifter_80 = Dprice_gap- Dprice_gap_along_demand_80)]
DD[, `:=` (Dprice_gap_shifter_70_17 = Dprice_gap- Dprice_gap_along_demand_70_17)]
DD[, `:=` (Dprice_gap_shifter_73_31 = Dprice_gap- Dprice_gap_along_demand_73_31)]

DD_eligible[, `:=` (Dprice_gap = (ph-pl)-(ph[1]-pl[1]) )]
DD_eligible[, `:=` (Dprice_gap_along_demand = -sigma_bp*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD_eligible[, `:=` (Dprice_gap_along_demand2 = -37.08*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD_eligible[, `:=` (Dprice_gap_shifter = Dprice_gap- Dprice_gap_along_demand)]
DD_eligible[, `:=` (Dprice_gap_shifter2 = Dprice_gap- Dprice_gap_along_demand2)]

DD_ineligible[, `:=` (Dprice_gap = (ph-pl)-(ph[1]-pl[1]) )]
DD_ineligible[, `:=` (Dprice_gap_along_demand = -sigma_bp*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD_ineligible[, `:=` (Dprice_gap_along_demand2 = -130.37*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD_ineligible[, `:=` (Dprice_gap_shifter = Dprice_gap- Dprice_gap_along_demand)]
DD_ineligible[, `:=` (Dprice_gap_shifter2 = Dprice_gap- Dprice_gap_along_demand2)]


DD[, `:=` (Dprice_gap_along_demand_hi = -(sigma_bp_hi)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD[, `:=` (Dprice_gap_along_demand_lo = -(sigma_bp_lo)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]

DD_eligible[, `:=` (Dprice_gap_along_demand_hi = -(sigma_bp_hi)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD_eligible[, `:=` (Dprice_gap_along_demand_lo = -(sigma_bp_lo)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]

DD_ineligible[, `:=` (Dprice_gap_along_demand_hi = -(sigma_bp_hi)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD_ineligible[, `:=` (Dprice_gap_along_demand_lo = -(sigma_bp_lo)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]


rectangle_A <- data.frame(xmin = min(sub_A),
                          xmax = max(sub_A),
                          ymin = -Inf, ymax = Inf)
rectangle_B <- data.frame(xmin = min(sub_B),
                          xmax = max(sub_B),
                          ymin = -Inf, ymax = Inf)

plot <- ggplot() +
  geom_line(data = DD_eligible[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic],
            aes(x=as.Date(DATE),y=Dprice_gap_shifter,color="eligible",,linetype="eligible"), size = 1.4) +
  geom_line(data = DD_ineligible[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic],
            aes(x=as.Date(DATE),y=Dprice_gap_shifter,color="ineligible",linetype="ineligible"), size = 1.4) + 
  xlab("") + ylab("Change in relative demand shocks (bps)") + theme_bw() +
  geom_text(aes(x=as.Date("2020-02-19"), label="stock market peak", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-05"), label="stock market trough", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-18"), label="PDCF", y = 25, vjust=-0.3), colour="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-23"), label="P/SMCCF announced", y = 25, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-04-09"), label="P/SMCCF expanded", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-05-12"), label="SMCCF began buying ETFs", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-16"), label="SMCCF began buying bonds", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-29"), label="PMCCF began operating", y = 100, vjust=-0.3), color="gray40", angle=90) +
  scale_colour_manual("",breaks = c("eligible","ineligible"),
                      values = c("eligible"=mahyarblue,"ineligible"=mahyarred),
                      labels = c("eligible bonds","ineligible bonds")) +
  scale_linetype_manual("",breaks = c("eligible","ineligible"),
                        values = c("eligible"=1,"ineligible"=12),
                        labels = c("eligible bonds","ineligible bonds")) +
  theme(axis.text.x = element_text(size=ftxtsize,face="plain"), axis.text.y = element_text(size=ftxtsize,face="plain"),
        axis.title.x = element_text(size=ftxtsize,face="plain"), axis.title.y = element_text(size=ftxtsize,face="plain"),
        legend.text = element_text(size=ftxtsize,face="plain")) + 
  theme(legend.key.size =  unit(0.64, "in"),legend.key.height =  unit(0.2, "in"),
        legend.key.width =  unit(0.4, "in"),legend.position="bottom",
        legend.spacing.x = unit(.3, 'cm'),legend.margin=margin(t=-10))+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 0.6)) + 
  geom_vline(xintercept=X_axis_breaks_EOD, color="gray45", size=0.6,linetype=2) +
  scale_x_date(breaks = as.Date(X_axis_breaks_EOD) , labels = X_axis_labels_EOD) + 
  geom_rect(data = rectangle_A, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
            fill = "darkseagreen4", alpha = 0.2) + 
  geom_rect(data = rectangle_B, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
            fill = "darkseagreen4", alpha = 0.2)
print(plot)

#### Figure A.1 ####
plot <- ggplot() +
  geom_line(data = DD[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic],
            aes(x=as.Date(DATE),y=Dprice_gap_shifter,color="sigma100",linetype="sigma100"), size = 1.4) +
  geom_line(data = DD[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic],
            aes(x=as.Date(DATE),y=Dprice_gap_shifter_80,color="sigma80",linetype="sigma80"), size = 1.2) +
  geom_line(data = DD[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic],
            aes(x=as.Date(DATE),y=Dprice_gap_shifter_70_17,color="sigma70_17",linetype="sigma70_17"), size = 1.2) +
  geom_line(data = DD[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic],
            aes(x=as.Date(DATE),y=Dprice_gap_shifter_73_31,color="sigma73_31",linetype="sigma73_31"), size = 1.2) + 
  xlab("") + ylab("Change in realtive demand shocks (bps)") + theme_bw() +
  geom_text(aes(x=as.Date("2020-02-19"), label="stock market peak", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-05"), label="stock market trough", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-18"), label="PDCF", y = 25, vjust=-0.3), colour="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-03-23"), label="P/SMCCF announced", y = 25, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-04-09"), label="P/SMCCF expanded", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-05-12"), label="SMCCF began buying ETFs", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-16"), label="SMCCF began buying bonds", y = 100, vjust=-0.3), color="gray40", angle=90) +
  geom_text(aes(x=as.Date("2020-06-29"), label="PMCCF began operating", y = 100, vjust=-0.3), color="gray40", angle=90) +
  scale_colour_manual("",breaks = c("sigma100","sigma80","sigma70_17","sigma73_31"),
                      values = c("sigma100"=mahyarblue,"sigma80"=mahyarred,"sigma70_17"=mahyargreen,"sigma73_31"="orange"),
                      labels = c("sigma = 100.09","sigma = 80.06","sigma = 70.17","sigma = 73.31")) +
  scale_linetype_manual("",breaks = c("sigma100","sigma80","sigma70_17","sigma73_31"),
                        values = c("sigma100"=1,"sigma80"=2,"sigma70_17"=3,"sigma73_31"=4),
                        labels = c("sigma = 100.09","sigma = 80.06","sigma = 70.17","sigma = 73.31")) +
  theme(axis.text.x = element_text(size=ftxtsize,face="plain"), axis.text.y = element_text(size=ftxtsize,face="plain"),
        axis.title.x = element_text(size=ftxtsize,face="plain"), axis.title.y = element_text(size=ftxtsize,face="plain"),
        legend.text = element_text(size=ftxtsize,face="plain")) + 
  theme(legend.key.size =  unit(0.64, "in"),legend.key.height =  unit(0.2, "in"),
        legend.key.width =  unit(0.4, "in"),legend.position="bottom",
        legend.spacing.x = unit(.3, 'cm'),legend.margin=margin(t=-10))+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 0.6)) + 
  geom_vline(xintercept=X_axis_breaks_EOD, color="gray45", size=0.6,linetype=2) +
  scale_x_date(breaks = as.Date(X_axis_breaks_EOD) , labels = X_axis_labels_EOD)
print(plot)

#### Figure 9 ####
#Start and end date for the calculations
start_date_cal <- ymd("20-01-01")
end_date_cal   <- ymd("20-08-28")

#Start and end date for the pictures  
start_date_pic <- ymd("20-01-19")
end_date_pic  <- ymd("20-07-01")
# end_date_pic <- end_date_cal

#Set the date breaks for the pictures
X_axis_breaks_EOD<-c(start_date_pic,as.Date(c("2019-12-19","2020-01-19","2020-02-19","2020-03-05","2020-03-18","2020-03-23", "2020-04-09","2020-05-12","2020-06-05")))
X_axis_labels_EOD<-format(X_axis_breaks_EOD,format = "%b-%d")



summ_stats_EOD <- fread(paste0(current_path,"/data/wrds_summary_statistics/logit_estimation_data/SumStats_combine_TRACE_standard_EOD_Jan_June.csv"))

#Change date format
summ_stats_EOD[,DATE:=ymd(DATE)]

#Remove weekends and holidays
all_DATES <- seq(min(summ_stats_EOD$DATE),max(summ_stats_EOD$DATE),by="1 day")
indic_B <- isBusinessDay(calendar="UnitedStates",all_DATES)
all_B_DATES<-all_DATES[indic_B]
summ_stats_EOD <- summ_stats_EOD[DATE %in% all_B_DATES]
#Extract three time series from Shuo's file: CH, MIRC and prop of agencies, our proxies for ph, pl, and xl

DD <- summ_stats_EOD[,c("DATE","ph","pl","xl","daily_netinflow_vol","daily_vol_customer_sold")]
old_names = c("daily_netinflow_vol")
new_names <- c("Delta_I")
setnames(DD,old_names,new_names)
#set the key for merging
setkey(DD,"DATE")
DD_complete <- DD


#Extract the volume of "dealer buy from costumer" from the market sentiment series, our proxy for N
market_sentiment <- fread(paste0(current_path,"/market_sentiment/merged_market_sentiment.csv"))
DT <-  market_sentiment[,c("DATE","TotalVolume_DealerBuyFromCustomer_AllSecurities")]
old_names <- c("DATE","TotalVolume_DealerBuyFromCustomer_AllSecurities")
new_names <- c("DATE","N")
setnames(DT,old_names,new_names)
#Set key for merging
setkey(DT,"DATE")
DT[,`:=` (DATE=as.Date(DATE))]

#Merge the two data sets
DD <- merge(DD,DT,all.x=TRUE)

DDprior <- DD[DATE>=ymd("2016-01-01") & DATE<=start_date_cal] 
DDprior[, `:=` (N=na.approx(N))] #in 2019 there are missing data for volume
DD<- DD[DATE>=start_date_cal & DATE<=end_date_cal]
DD[, `:=` (N=na.approx(N))] #in 2019 there are missing data for volume



#Compares mean 2020 with prior years
DD19mean<-DDprior[,lapply(.SD,mean),by=month(DATE)]
DD19mean[,month:=month(month,label=TRUE)]
DD19mean[,period:="2016-2019"]

DD20mean<-DD[,lapply(.SD,mean),by=month(DATE)]
DD20mean[,month:=month(month,label=TRUE)]
DD20mean[,period:="2020"]

DDmean<-rbind(DD19mean,DD20mean)

#New variables 
DD[, `:=` (xh=1-xl)] #proportion of risky principal trades
DD[, `:=` (Xh=N*xh)] #dollar volume of risky principal trades
DD[, `:=` (logXh = log(Xh))] #log dollar volume of risky principal trades
DD[, `:=` (Xl=N*xl)] #dollar volume of agency trades
DD[, `:=` (logXl = log(Xl))] #log dollar volume of agnecy trades
DD[, `:=` (logN = log(N))] #log dollar volume
DD[, `:=` (MRS=ph/pl)] #relative price of risky principal to agency trades
DD[, `:=` (logMRS = log(MRS))] #log relative price
DD[, `:=` (a= xl*pl+(1-xl)*ph)] #average transaction cost
DD[, `:=` (c= xh*(ph-pl))] #average cost of immediacy
#Lagged variables
DD[, `:=` (xh_lag = shift(xh,type="lag",fill=xh[1L]))]
DD[, `:=` (Xh_lag =  shift(Xh,type="lag",fill = Xh[1L]))]
DD[, `:=` (logXh_lag =  shift(logXh,type="lag",fill=logXh[1L]))]
DD[, `:=` (xl_lag =  shift(xl,type="lag",fill=xl[1L]))]
DD[, `:=` (Xl_lag =  shift(Xl,type="lag",fill=Xl[1L]))]
DD[, `:=` (logXl_lag =  shift(logXl,type="lag",fill=logXl[1L]))]
DD[, `:=` (N_lag =  shift(N,type="lag",fill=N[1L]))]
DD[, `:=` (logN_lag =  shift(logN,type="lag",fill=logN[1L]))]
DD[, `:=` (pl_lag =  shift(pl,type="lag",fill=pl[1L]))]
DD[, `:=` (ph_lag =  shift(ph,type="lag",fill=ph[1L]))]
DD[, `:=` (MRS_lag =  shift(MRS,type="lag",fill=MRS[1L]))]
DD[, `:=` (logMRS_lag =  shift(logMRS,type="lag",fill=logMRS[1L]))]
DD[, `:=` (a_lag =  shift(a,type="lag",fill=a[1L]))]
DD[, `:=` (c_lag =  shift(c,type="lag",fill=c[1L]))]

#Changes in variables
DD[, `:=` (dxh=ifelse(is.na(xh - xh_lag),0,xh - xh_lag))]
DD[, `:=` (dXh=ifelse(is.na(Xh - Xh_lag),0,Xh - Xh_lag))]
DD[, `:=` (dlogXh=ifelse(is.na(logXh - logXh_lag),0,logXh - logXh_lag))]
DD[, `:=` (dxl=ifelse(is.na(xl - xl_lag),0,xl - xl_lag))]
DD[, `:=` (dXl=ifelse(is.na(Xl - Xl_lag),0,Xh - Xl_lag))]
DD[, `:=` (dlogXl=ifelse(is.na(logXl - logXl_lag),0,logXl - logXl_lag))]
DD[, `:=` (dN=ifelse(is.na(N - N_lag),0,N - N_lag))]
DD[, `:=` (dlogN=ifelse(is.na(logN - logN_lag),0,logN - logN_lag))]
DD[, `:=` (dph=ifelse(is.na(ph - ph_lag),0,ph - ph_lag))]
DD[, `:=` (dpl=ifelse(is.na(pl - pl_lag),0,pl - pl_lag))]
DD[, `:=` (dMRS=ifelse(is.na(MRS - MRS_lag),0,MRS - MRS_lag))]
DD[, `:=` (dlogMRS=ifelse(is.na(logMRS - logMRS_lag),0,logMRS - logMRS_lag))]
DD[, `:=` (da=ifelse(is.na(a - a_lag),0,a - a_lag))]
DD[, `:=` (dc=ifelse(is.na(c - c_lag),0,c - c_lag))]
# Moving averages for quantities
MA_param <- 10

DD[,`:=`(xl_MA=movavg(xl,MA_param ,"s") )] #moving average for agency trades proportion
DD[,`:=`(xh_MA=movavg(xh,MA_param ,"s") )] #moving average for risky principal trades proportion
DD[,`:=` (N_MA=movavg(N,MA_param ,"s"))] #moving average of volume
DD[,`:=`(Xh_MA=movavg(Xh,MA_param ,"s") )] #moving average for risky principal trades volume
DD[,`:=`(Xl_MA=movavg(Xl,MA_param ,"s") )] #moving average for risky principal trades volume

DD[, `:=` (xh_MA_lag = shift(xh_MA,type="lag",fill=xh[1L]))]
DD[, `:=` (Xh_MA_lag =  shift(Xh_MA,type="lag",fill = Xh[1L]))]
DD[, `:=` (xl_MA_lag =  shift(xl_MA,type="lag",fill=xl[1L]))]
DD[, `:=` (Xl_MA_lag =  shift(Xl_MA,type="lag",fill=Xl[1L]))]


#Changes in variables
DD[, `:=` (dxh_MA=xh_MA - xh_MA_lag)]
DD[, `:=` (dXh=Xh_MA - Xh_MA_lag)]
DD[, `:=` (dxl_MA=xl_MA - xl_MA_lag)]
DD[, `:=` (dXl_MA=Xl_MA - Xl_MA_lag)]


#Average transaction cost with MA for quantities
DD[, `:=` (a_MA= xl_MA*pl+xh_MA*ph)] #average transaction cost
DD[, `:=` (c_MA= xh_MA*(ph-pl))]
DD[, `:=` (Dc_MA= c_MA - c_MA[1])] # change in average transaction cost

#Surplus calculation, no MA for quantities, via Envelope

w_up <- 1 #weight given to the upper bound 
w_lo <- 1-w_up #weight given to the lower bound 

#The upper bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_up= -xl*dpl-xh*dph)]
DD[, `:=` (Ds_up=cumsum(ds_up))]
#The lower bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_lo= -xl_lag*dpl-xh_lag*dph)]
DD[, `:=` (Ds_lo=cumsum(ds_lo))]
#We use the convention that level variables are not lagged for calculating changes
#[this means that we use the upper bound]
DD[, `:=` (Ds = w_up*Ds_up+w_lo*Ds_lo) ]
DD[, `:=` (ds = w_up*ds_up+w_lo*Ds_lo) ]

#Surplus calculation, with MA for quantities, via Envelope

#The upper bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_MA_up= -xl_MA*dpl-xh_MA*dph)]
DD[, `:=` (Ds_MA_up=cumsum(ds_MA_up))]
#The lower bound for the change in consumer surplus from one period to the next
DD[, `:=` (ds_MA_lo= -xl_MA_lag*dpl-xh_MA_lag*dph)]
DD[, `:=` (Ds_MA_lo=cumsum(ds_MA_lo))]
#We use the convention that level variables are not lagged for calculating changes
#[this means that we use the upper bound]
DD[, `:=` (Ds_MA = w_up*Ds_MA_up+w_lo*Ds_MA_lo) ]
DD[, `:=` (ds_MA = w_up*ds_MA_up+w_lo*Ds_MA_lo) ]


#Change in utility
DD[, `:=` (du = dxl*pl + dxh*ph)]
DD[, `:=` (Du=cumsum(du))]
#Robustness check: used lagged level variables 
DD[, `:=` (du_laglevel =dxl*pl_lag+dxh*ph_lag )]
DD[, `:=` (Du_laglevel=cumsum(du_laglevel))]


#Change in average transaction cost
#Average transaction cost
DD[, `:=` (Da= a - a[1])]




#Estimate of sigma
#A is early January to mid February B is mid May to end June


sub_A <- DD[DATE>=ymd("2020-01-15") & DATE<=ymd("2020-02-15"),DATE]
N_A   <- length(sub_A)
mean_price_gap_A <- mean(DD[DATE %in% sub_A,ph-pl])
mean_log_trade_ratio_A <- mean(DD[DATE %in% sub_A,log(xh/xl)])
mean_log_trade_ratio_MA_A <- mean(DD[DATE %in% sub_A,log(xh_MA/xl_MA)])


sub_B <- DD[DATE>=ymd("2020-06-01") & DATE<=ymd("2020-06-30"),DATE]
N_B <- length(sub_B)
mean_price_gap_B <- mean(DD[DATE %in% sub_B,ph-pl])
mean_log_trade_ratio_B <- mean(DD[DATE %in% sub_B,log(xh/xl)])
mean_log_trade_ratio_MA_B <- mean(DD[DATE %in% sub_B,log(xh_MA/xl_MA)])

sigma_bp <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_B-mean_log_trade_ratio_A) 
sigma_bp_MA <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A) 

# sigma_bp <- 100.0924
# sigma_bp_MA <- 100.5234

# For the picture, the rectangles corresponding to sub period A and B

rectangle_A <- data.frame(xmin = min(sub_A),
                          xmax = max(sub_A),
                          ymin = -Inf, ymax = Inf)
rectangle_B <- data.frame(xmin = min(sub_B),
                          xmax = max(sub_B),
                          ymin = -Inf, ymax = Inf)



#bind together the de-meaned price gap for the two subperiods
de_mean_price_gap_A <- DD[DATE %in% sub_A,ph-pl] -mean_price_gap_A  
de_mean_price_gap_B <- DD[DATE %in% sub_B,ph-pl] -mean_price_gap_B  
de_mean_price_gap   <- c(de_mean_price_gap_A,de_mean_price_gap_B)

#bind together the de-meaned log trade ratio for the two subperiods
de_mean_log_trade_ratio_A <- DD[DATE %in% sub_A,log(xh/xl)] -mean_log_trade_ratio_A  
de_mean_log_trade_ratio_B <- DD[DATE %in% sub_B,log(xh/xl)] -mean_log_trade_ratio_B  
de_mean_log_trade_ratio   <- c(de_mean_log_trade_ratio_A,de_mean_log_trade_ratio_B)

#bind together the de-meaned log trade ratio MA for the two subperiods
de_mean_log_trade_ratio_MA_A <- DD[DATE %in% sub_A,log(xh_MA/xl_MA)] -mean_log_trade_ratio_MA_A  
de_mean_log_trade_ratio_MA_B <- DD[DATE %in% sub_B,log(xh_MA/xl_MA)] -mean_log_trade_ratio_MA_B  
de_mean_log_trade_ratio_MA   <- c(de_mean_log_trade_ratio_MA_A,de_mean_log_trade_ratio_MA_B)


#calculate the sample variance covariance matrix (one can probably do better...)
de_mean_matrix <- as.matrix(rbind(de_mean_price_gap,de_mean_log_trade_ratio))
sample_covar <- de_mean_matrix%*%t(de_mean_matrix)/(N_A+N_B-2)

#same with MA series
de_mean_matrix_MA <- as.matrix(rbind(de_mean_price_gap,de_mean_log_trade_ratio_MA))
sample_covar_MA <- de_mean_matrix_MA%*%t(de_mean_matrix_MA)/(N_A+N_B-2)

#calculate the standard error using the delta method
grad_f1 <- 1/(mean_log_trade_ratio_B-mean_log_trade_ratio_A) 
grad_f2 <- -(mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_B-mean_log_trade_ratio_A)^2
grad_f <- as.matrix(c(grad_f1,grad_f2))
std_sigma_bp <- sqrt( t(grad_f)%*%sample_covar%*%grad_f*(1/N_A+1/N_B))

#same with MA series
grad_f1_MA <- -1/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A) 
grad_f2_MA <- (mean_price_gap_B-mean_price_gap_A)/(mean_log_trade_ratio_MA_B-mean_log_trade_ratio_MA_A)^2
grad_f_MA <- as.matrix(c(grad_f1_MA,grad_f2_MA))
std_sigma_bp_MA <- sqrt( t(grad_f)%*%sample_covar_MA%*%grad_f*(1/N_A+1/N_B))

sigma_bp_hi <- sigma_bp + 2*std_sigma_bp
sigma_bp_lo <- sigma_bp - 2*std_sigma_bp

#The logit surplus calculated here is for "immediacy" only (i.e. uprgrade to risky principal)
DD[, `:=` (Ds_MA_logit= - sigma_bp*log(xl_MA/xl_MA[1]))]
DD[, `:=` (Ds_MA_logit_hi= - (sigma_bp_hi)*log(xl_MA/xl_MA[1]))]
DD[, `:=` (Ds_MA_logit_lo= - (sigma_bp_lo)*log(xl_MA/xl_MA[1]))]


#The logit surplus calculated numerically as cumulated derivative 
DD[, `:=` (ds_MA_logit_check= sigma_bp*dxh_MA/(1-xh_MA))]
DD[, `:=` (Ds_MA_logit_check= cumsum(ds_MA_logit_check))]
#Decomposition of change in surplus, logit
DD[, `:=` (ds_logit_pref = sigma_bp*dxh/(1-xh) + (dph-dpl)*xh)]
DD[, `:=` (Ds_logit_pref = cumsum(ds_logit_pref))]

#Decomposition of change in surplus, logit
#Transaction costs
#DD[, `:=` (ds_MA_logit_cost_1 = -dc_MA]
DD[, `:=` (Ds_MA_logit_cost_1 = -c_MA+c_MA[1])]
#Composition
DD[, `:=` (ds_MA_logit_comp_1 = dxh_MA*(ph-pl))]
DD[, `:=` (Ds_MA_logit_comp_1 = cumsum(ds_MA_logit_comp_1))]
#Preference
DD[, `:=` (ds_MA_logit_pref_1 = sigma_bp*dxh_MA/(1-xh_MA) + (dph-dpl)*xh_MA)]
DD[, `:=` (Ds_MA_logit_pref_1 = cumsum(ds_MA_logit_pref_1))]
DD[, `:=` (ds_MA_logit_pref_hi_1 = sigma_bp_hi*dxh_MA/(1-xh_MA) + (dph-dpl)*xh_MA)]
DD[, `:=` (Ds_MA_logit_pref_hi_1 = cumsum(ds_MA_logit_pref_hi_1))]
DD[, `:=` (ds_MA_logit_pref_lo_1 = sigma_bp_lo*dxh_MA/(1-xh_MA) + (dph-dpl)*xh_MA)]
DD[, `:=` (Ds_MA_logit_pref_lo_1 = cumsum(ds_MA_logit_pref_lo_1))]


#Change in price gap

DD[, `:=` (Dprice_gap = (ph-pl)-(ph[1]-pl[1]) )]
DD[, `:=` (Dprice_gap_along_demand = -sigma_bp*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD[, `:=` (Dprice_gap_shifter = Dprice_gap- Dprice_gap_along_demand)]


DD[, `:=` (Dprice_gap_along_demand_hi = -(sigma_bp_hi)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]
DD[, `:=` (Dprice_gap_along_demand_lo = -(sigma_bp_lo)*(log(xh_MA/xl_MA)-log(xh_MA[1]/xl_MA[1])))]


#The cost component of the logit surplus
DD[, `:=` (Ds_MA_logit_cost_2= -c_MA+c_MA[1])]
#The preference shock component of the logit surplus
DD[, `:=` (Ds_MA_logit_pref_2= Dprice_gap_shifter*xh_MA)]
#The composition component of the logit surplus
DD[, `:=` (Ds_MA_logit_comp_2 = Ds_MA_logit-Ds_MA_logit_cost_2-Ds_MA_logit_pref_2)]




series_label <- c("change in logit surplus for immediacy")
plot <- ggplot() +
  geom_line(data = DD[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic ],aes(x=as.Date(DATE),y=Ds_MA_logit),size = 1.4, color=mahyarblue) +
  geom_ribbon(data = DD[as.Date(DATE)>=start_date_pic & as.Date(DATE)<=end_date_pic ],aes(x=as.Date(DATE),ymin = Ds_MA_logit_lo, ymax = Ds_MA_logit_hi), alpha= 0.2,fill = "grey70")+
  xlab("") + ylab("change in surplus from immediacy (bps)") + theme_bw() +
  scale_colour_manual("",breaks = c("s"),
                      values = c("s"=mahyarblue),
                      labels = series_label) +
  scale_linetype_manual("",breaks = c("s"),
                        values = c("s"=1),
                        labels = series_label) + 
  theme(axis.text.x = element_text(size=ftxtsize,face="plain"), axis.text.y = element_text(size=ftxtsize,face="plain"),
        axis.title.x = element_text(size=ftxtsize,face="plain"), axis.title.y = element_text(size=ftxtsize,face="plain"),
        legend.text = element_text(size=ftxtsize,face="plain")) + 
  theme(legend.key.size =  unit(0.64, "in"),legend.key.height =  unit(0.2, "in"),
        legend.key.width =  unit(0.4, "in"),legend.position="bottom",
        legend.spacing.x = unit(.6, 'cm'),legend.margin=margin(t=-10))+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 0.6)) + 
  geom_vline(xintercept=X_axis_breaks_EOD, color="gray45", size=0.6,linetype=2) +
  scale_x_date(breaks = as.Date(X_axis_breaks_EOD) , labels = X_axis_labels_EOD) + 
  geom_rect(data = rectangle_A, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
            fill = "darkseagreen4", alpha = 0.2) + 
  geom_rect(data = rectangle_B, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
            fill = "darkseagreen4", alpha = 0.2)
print(plot)
